package com.news.util;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 正则表达式
 * 过滤HTML源代码中的图片地址
 * @author Administrator
 *
 */

public class ImgFliter {
	public static void fliter(String htmlCode) {
//		 List list = getImageSrc("<p><img alt=\'' src=/\"f:2.jpg/\"/></p><img src = 'c:1.bmp'>");
		List list = getImageSrc(htmlCode);
		for (Iterator i = list.iterator(); i.hasNext();)
			System.out.println("fliter:  "+i.next());
	}

	 public static List getImageSrc(String htmlCode) {      
	        List imageSrcList = new ArrayList();      
	        Pattern p = Pattern.compile("<img\\b[^>]*\\bsrc\\b\\s*=\\s*('|\")?([^'\"\n\r\f>]+(\\.jpg|\\.bmp|\\.eps|\\.gif|\\.mif|\\.miff|\\.png|\\.tif|\\.tiff|\\.svg|\\.wmf|\\.jpe|\\.jpeg|\\.dib|\\.ico|\\.tga|\\.cut|\\.pic)\\b)[^>]*>", Pattern.CASE_INSENSITIVE);  
	        Matcher m = p.matcher(htmlCode);      
	        String quote = null;      
	        String src = null;      
	        while (m.find()) {      
	            quote = m.group(1);       
	            src = (quote == null || quote.trim().length() == 0) ? m.group(2).split("\\s+")[0] : m.group(2);      
	            imageSrcList.add(src);      
	        }      
	        return imageSrcList;      
	    } 

}
